### Gender and Family Ties in Latin American Legislatures
### Replication File
### July 11, 2019
rm(list=ls(all=TRUE))

# Loading relevant packages
library(foreign)
library(plyr)
library(optmatch)
library(rcbalance)

# Importing the data
data<- read.csv("/Users/franciscocantu/Dropbox/Gender and Legacies/dataforR.csv")

# Keeping the relevant variables
d<-data[,c('gender','age','education','ideology_legislator','nestu','partycode','legacy','couname','surveywave','ccode')]


#rcbalance
# We first define the variables we will use to estimate the Mahalanobis distance
maha.vars <- c('age','education','ideology_legislator')


# Create distance structure:
# We ask to find a match for every treatment unit (defined by gender) that minimizes the Mahalanobis distance
# We also ask to only choose for a control unit from the same party than the treated unit. This restriction coercens the matching not only to parties but also to countries
# We define a caliper of 0.2, or that the distance between a treated and matching unit cannot be larger than 20% of a standard deviation for the estimated Mahalonibs distance in the database
my.dist <- build.dist.struct(z=d$gender , X=d[maha.vars],
                             exact = d$partycode, calip.option = "propensity", caliper = 0.2)

# Now we coarse a few continuous variables to refine the balance
d$age1 <- d$age <=30 
d$age2 <- d$age > 30 & d$age <=40
d$age3 <- d$age > 40 & d$age <=50
d$age4 <- d$age > 50 & d$age <=60
d$age5 <- d$age > 60

d$education1 <- d$education <= 2
d$education2 <- d$education == 3
d$education3 <- d$education == 4
d$education4 <- d$education == 5
d$education5 <- d$education == 6


# Define fine balance levels
l1 <- c("age1", "age2", "age3", "age4", "age5",
        "education1","education2","education3","education4","education5")


# We now matching the units. 
# We use the balance structured created before (my.dist) and specify the matching by prioritizing on the age, and the education categories of the legislator. 
# The matching is specified to produce the best refined covariate balance subject to the ideology of the legislator, matching exactly on this variable wherever possible.
# We allow to exclude treated units to not consider those observations without control units.
match.out <- rcbalance(my.dist, near.exact = "ideology_legislator", fb.list = list(l1),
                       treated.info = d[d$gender == 1,],
                       control.info = d[d$gender == 0,], exclude.treated = TRUE)

# Splitting the database by gender
treated.info = d[d$gender == 1,]
control.info = d[d$gender == 0,]

# Reset the row index. This step eliminates the row index used in the original database, and replaces it by one match.out uses to identify treated and control units.
rownames(treated.info) <- NULL
rownames(control.info) <- NULL

# Now we select (1) treated units that have a control and (2) units selected to be control
femaleleg <- treated.info[rownames(treated.info) %in% rownames(match.out$matches),]
maleleg <- control.info[rownames(control.info) %in% match.out$matches,]

# Merging both subsets
matcheddata<-rbind(femaleleg, maleleg)

# Savind the data to .dta format
write.dta(matcheddata, "/Users/franciscocantu/Dropbox/Gender and Legacies/Dataset/matcheddata.dta")